In [2]:
%load_ext autoreload
%autoreload 2
In [18]:
from lib import *
from pipeline_presets import *
from grid_search_presets import *

Data Loading, Set Splitting Functions, Data Resampling¶

In [4]:
# Load breast cancer dataset
cancer = load_breast_cancer()
cancer_df = pd.DataFrame(data=cancer.data, columns=cancer.feature_names)
cancer_df['target'] = cancer.target

# Preview
display(cancer_df.head())
display(cancer_df.describe())
mean radius mean texture mean perimeter mean area mean smoothness mean compactness mean concavity mean concave points mean symmetry mean fractal dimension ... worst texture worst perimeter worst area worst smoothness worst compactness worst concavity worst concave points worst symmetry worst fractal dimension target
0 17.99 10.38 122.80 1001.0 0.11840 0.27760 0.3001 0.14710 0.2419 0.07871 ... 17.33 184.60 2019.0 0.1622 0.6656 0.7119 0.2654 0.4601 0.11890 0
1 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.0869 0.07017 0.1812 0.05667 ... 23.41 158.80 1956.0 0.1238 0.1866 0.2416 0.1860 0.2750 0.08902 0
2 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.1974 0.12790 0.2069 0.05999 ... 25.53 152.50 1709.0 0.1444 0.4245 0.4504 0.2430 0.3613 0.08758 0
3 11.42 20.38 77.58 386.1 0.14250 0.28390 0.2414 0.10520 0.2597 0.09744 ... 26.50 98.87 567.7 0.2098 0.8663 0.6869 0.2575 0.6638 0.17300 0
4 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.1980 0.10430 0.1809 0.05883 ... 16.67 152.20 1575.0 0.1374 0.2050 0.4000 0.1625 0.2364 0.07678 0

5 rows × 31 columns

mean radius mean texture mean perimeter mean area mean smoothness mean compactness mean concavity mean concave points mean symmetry mean fractal dimension ... worst texture worst perimeter worst area worst smoothness worst compactness worst concavity worst concave points worst symmetry worst fractal dimension target
count 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 ... 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000
mean 14.127292 19.289649 91.969033 654.889104 0.096360 0.104341 0.088799 0.048919 0.181162 0.062798 ... 25.677223 107.261213 880.583128 0.132369 0.254265 0.272188 0.114606 0.290076 0.083946 0.627417
std 3.524049 4.301036 24.298981 351.914129 0.014064 0.052813 0.079720 0.038803 0.027414 0.007060 ... 6.146258 33.602542 569.356993 0.022832 0.157336 0.208624 0.065732 0.061867 0.018061 0.483918
min 6.981000 9.710000 43.790000 143.500000 0.052630 0.019380 0.000000 0.000000 0.106000 0.049960 ... 12.020000 50.410000 185.200000 0.071170 0.027290 0.000000 0.000000 0.156500 0.055040 0.000000
25% 11.700000 16.170000 75.170000 420.300000 0.086370 0.064920 0.029560 0.020310 0.161900 0.057700 ... 21.080000 84.110000 515.300000 0.116600 0.147200 0.114500 0.064930 0.250400 0.071460 0.000000
50% 13.370000 18.840000 86.240000 551.100000 0.095870 0.092630 0.061540 0.033500 0.179200 0.061540 ... 25.410000 97.660000 686.500000 0.131300 0.211900 0.226700 0.099930 0.282200 0.080040 1.000000
75% 15.780000 21.800000 104.100000 782.700000 0.105300 0.130400 0.130700 0.074000 0.195700 0.066120 ... 29.720000 125.400000 1084.000000 0.146000 0.339100 0.382900 0.161400 0.317900 0.092080 1.000000
max 28.110000 39.280000 188.500000 2501.000000 0.163400 0.345400 0.426800 0.201200 0.304000 0.097440 ... 49.540000 251.200000 4254.000000 0.222600 1.058000 1.252000 0.291000 0.663800 0.207500 1.000000

8 rows × 31 columns

In [5]:
def get_train_test_cancer(cancer_df, test_size=0.2, random_state = global_random_state, stratify=True, resample=True) -> tuple:
    """
    Returns a tuple of (X_train, X_test, y_train, y_test) for the diabetes dataset.
    """
    
    # Split the data into features and target variable
    X = cancer_df.drop(columns=['target'])
    y = cancer_df['target']
    
    # Split the dataset into training and testing sets
    
    if stratify:
        return train_test_split(X, y, random_state=random_state, test_size=test_size, stratify=y)
    else:
        return train_test_split(X, y, random_state=random_state, test_size=0.2)
In [7]:
X_train, X_test, y_train, y_test = get_train_test_cancer(cancer_df, test_size=0.2, random_state=global_random_state, stratify=True, resample=True)

Exploratory Data Analysis¶

Pairplot (Only between means)¶

In [40]:
# Make pairplots between all features with the target variable being outcome
cancer_df_means = cancer_df[[col for col in cancer_df.columns if col.startswith("mean") or col == "target"]]
sns.pairplot(cancer_df_means, hue='target', markers='o', height=2.5, aspect=1.2, plot_kws={'s': 20})
Out[40]:
<seaborn.axisgrid.PairGrid at 0x2b8369090>
No description has been provided for this image

Pairplot (Only between worsts)¶

In [39]:
# Make pairplots between all features with the target variable being outcome
cancer_df_worsts = cancer_df[[col for col in cancer_df.columns if col.startswith("worst") or col == "target"]]
sns.pairplot(cancer_df_means, hue='target', markers='o', height=2.5, aspect=1.2, plot_kws={'s': 20})
Out[39]:
<seaborn.axisgrid.PairGrid at 0x2a4b49180>
No description has been provided for this image

Correlation Plot¶

In [ ]:
# Make a correlation heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(cancer_df.corr(), annot=True, fmt='.3f', cmap='coolwarm', square=True, cbar_kws={"shrink": .8}, annot_kws={"size": 5}, linewidths=0.5, linecolor='black')
# 
Out[ ]:
<Axes: >
No description has been provided for this image

Countplot¶

In [32]:
#Countplot of the target variable
plt.figure(figsize=(8, 5))
sns.countplot(x='target', data=cancer_df, palette='Set2')
/var/folders/tp/3nd20r2x6ns2txnn5kmx15dr0000gn/T/ipykernel_30431/474471373.py:3: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(x='target', data=cancer_df, palette='Set2')
Out[32]:
<Axes: xlabel='target', ylabel='count'>
No description has been provided for this image

Feature Distribution Plots¶

In [38]:
# Feature distribution plots
def plot_feature_distribution(df, feature):
    plt.figure(figsize=(10, 5))
    sns.histplot(df[feature], kde=True, bins=30, color='blue')
    plt.title(f'Distribution of {feature}')
    plt.xlabel(feature)
    plt.ylabel('Frequency')
    plt.grid()
    plt.show()

# Plot feature distributions for all features
for feature in cancer_df.columns[:-1]:  # Exclude the target variable 'Outcome'
    plot_feature_distribution(cancer_df, feature)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Model Fitting and Scoring¶

Preparation¶

In [ ]:
def grid_search_preset(pipe_source, grid_search_source):
    prep_pipe = get_preprocessing_pipe(scaling=True, preprocessing="PCA")
    pipe = pipe_source(prep_pipe)
    prep_grid = get_preprocessing_grid_search(pipe)
    return grid_search_source(pipe, prep_grid, scoring='f1')

KNN¶

In [8]:
grid_search = grid_search_preset(get_knn_pipe, get_knn_grid_search)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

#Print classification report for the best model
print("KNN Report:")
y_pred = grid_search.predict(X_test)
print(classification_report(y_test, y_pred))

#Print Confusion Matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Print the best parameters and score
print("Best Parameters:", best_params)
print("F1 Score of Best Model:", best_score)

knn_grid_search = grid_search
KNN Report:
              precision    recall  f1-score   support

           0       0.87      0.93      0.90        42
           1       0.96      0.92      0.94        72

    accuracy                           0.92       114
   macro avg       0.91      0.92      0.92       114
weighted avg       0.92      0.92      0.92       114

Confusion Matrix:
[[39  3]
 [ 6 66]]
Best Parameters: {'KNN__algorithm': 'auto', 'KNN__n_neighbors': 7, 'KNN__weights': 'uniform', 'PCA__n_components': 5, 'PCA__random_state': 42, 'Scaler': StandardScaler()}
F1 Score of Best Model: 0.9771980285254622

Naive Bayes¶

In [9]:
grid_search = grid_search_preset(get_naive_bayes_pipe, get_naive_bayes_grid_search)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

# Print classification report for the best model
print("Naive Bayes Report:")
y_pred = grid_search.predict(X_test)
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("Best Parameters:", best_params)
print("F1 Score of Best Model:", best_score)

naive_bayes_grid_search = grid_search
Naive Bayes Report:
              precision    recall  f1-score   support

           0       0.84      0.88      0.86        42
           1       0.93      0.90      0.92        72

    accuracy                           0.89       114
   macro avg       0.88      0.89      0.89       114
weighted avg       0.90      0.89      0.90       114

Confusion Matrix:
[[37  5]
 [ 7 65]]
Best Parameters: {'PCA__n_components': 5, 'PCA__random_state': 42, 'Scaler': StandardScaler(), 'clf': GaussianNB(), 'clf__var_smoothing': 1e-09}
F1 Score of Best Model: 0.9547040124626331

Logistic Regression¶

In [10]:
# "UserWarning: Setting penalty='None' will ignore the C and l1_ratio parameters"
# Repeated warnings coming from logreg grid search can be suppressed with this file 
# Penalty=None is not supported for the liblinear solver, so 1/4 fail, but that's okay.
from remove_warnings import *

with suppress_stdout_stderr():
    warnings.filterwarnings('ignore', category=UserWarning)
    
    grid_search = grid_search_preset(get_log_reg_pipe, get_log_reg_grid_search)
    
    grid_search.fit(X_train, y_train)
    
best_params = grid_search.best_params_
best_score = grid_search.best_score_

# Print classification report for the best model
print("Logistic Regression Report:")
y_pred = grid_search.predict(X_test)
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("Best Parameters:", best_params)
print("F1 Score of Best Model:", best_score)

log_reg_grid_search = grid_search
Logistic Regression Report:
              precision    recall  f1-score   support

           0       0.91      0.95      0.93        42
           1       0.97      0.94      0.96        72

    accuracy                           0.95       114
   macro avg       0.94      0.95      0.94       114
weighted avg       0.95      0.95      0.95       114

Confusion Matrix:
[[40  2]
 [ 4 68]]
Best Parameters: {'LogisticRegression__C': 1, 'LogisticRegression__penalty': 'l2', 'LogisticRegression__solver': 'liblinear', 'PCA__n_components': 5, 'PCA__random_state': 42, 'Scaler': StandardScaler()}
F1 Score of Best Model: 0.98068486433475

Decision Trees¶

In [11]:
grid_search = grid_search_preset(get_decision_tree_pipe, get_decision_tree_grid_search)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

# Print classification report for the best model
print("Decision Tree Report:")
y_pred = grid_search.predict(X_test)
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("Best Parameters:", best_params)
print("F1 Score of Best Model:", best_score)

decision_tree_grid_search = grid_search
Decision Tree Report:
              precision    recall  f1-score   support

           0       0.84      0.98      0.90        42
           1       0.98      0.89      0.93        72

    accuracy                           0.92       114
   macro avg       0.91      0.93      0.92       114
weighted avg       0.93      0.92      0.92       114

Confusion Matrix:
[[41  1]
 [ 8 64]]
Best Parameters: {'DecisionTree__criterion': 'gini', 'DecisionTree__max_depth': 5, 'DecisionTree__min_samples_leaf': 1, 'DecisionTree__min_samples_split': 5, 'PCA__n_components': 3, 'PCA__random_state': 42, 'Scaler': StandardScaler()}
F1 Score of Best Model: 0.9609786791365739

Support Vector Machines¶

Linear Support Vector Machines (Faster: able to utilize larger grid search space)¶

In [12]:
grid_search = grid_search_preset(get_lin_svm_pipe, get_lin_svm_grid_search)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

# Print classification report for the best model
print("Linear SVM Report:")
y_pred = grid_search.predict(X_test)
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("Best Parameters:", best_params)
print("F1 Score of Best Model:", best_score)

linear_svm_grid_search = grid_search
Linear SVM Report:
              precision    recall  f1-score   support

           0       0.91      0.95      0.93        42
           1       0.97      0.94      0.96        72

    accuracy                           0.95       114
   macro avg       0.94      0.95      0.94       114
weighted avg       0.95      0.95      0.95       114

Confusion Matrix:
[[40  2]
 [ 4 68]]
Best Parameters: {'PCA__n_components': 5, 'PCA__random_state': 42, 'SVM__C': 0.1, 'SVM__penalty': 'l1', 'Scaler': StandardScaler()}
F1 Score of Best Model: 0.9788528506722752

Nonlinear / Kernel Support Vector Machines¶

In [13]:
grid_search = grid_search_preset(get_kernel_svm_pipe, get_kernel_svm_grid_search)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Kernel SVM Report:")
y_pred = grid_search.predict(X_test)
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("Best Parameters:", best_params)
print("F1 Score of Best Model:", best_score)

kernel_svm_grid_search = grid_search
Kernel SVM Report:
              precision    recall  f1-score   support

           0       0.91      0.95      0.93        42
           1       0.97      0.94      0.96        72

    accuracy                           0.95       114
   macro avg       0.94      0.95      0.94       114
weighted avg       0.95      0.95      0.95       114

Confusion Matrix:
[[40  2]
 [ 4 68]]
Best Parameters: {'PCA__n_components': 5, 'PCA__random_state': 42, 'SVM__C': 10, 'SVM__gamma': 'scale', 'SVM__kernel': 'rbf', 'SVM__probability': False, 'Scaler': StandardScaler()}
F1 Score of Best Model: 0.9806508566197307

Final Score Comparison¶

In [16]:
# Final score comparison
print("\n===Final F1 Score Comparison===\n")
print(f"KNN:{round(knn_grid_search.best_score_ * 100, 2)}%")
print(f"Naive Bayes:{round(naive_bayes_grid_search.best_score_ * 100, 2)}%")
print(f"Logistic Regression:{round(log_reg_grid_search.best_score_ * 100, 2)}%")
print(f"Decision Tree:{round(decision_tree_grid_search.best_score_ * 100, 2)}%")
print(f"Linear SVM:{round(linear_svm_grid_search.best_score_ * 100, 2)}%")
print(f"Kernel SVM:{round(kernel_svm_grid_search.best_score_ * 100, 2)}%")
===Final F1 Score Comparison===

KNN:97.72%
Naive Bayes:95.47%
Logistic Regression:98.07%
Decision Tree:96.1%
Linear SVM:97.89%
Kernel SVM:98.07%

Time Reports for all training¶

In [15]:
print("=========KNN==========")
print_time_report(knn_grid_search)
print("=========Naive Bayes==========")
print_time_report(naive_bayes_grid_search)
print("=========Logistic Regression==========")
print_time_report(log_reg_grid_search)
print("=========Decision Tree==========")
print_time_report(decision_tree_grid_search)
print("=========Linear SVM==========")
print_time_report(linear_svm_grid_search)
print("=========Kernel SVM==========")
print_time_report(kernel_svm_grid_search)
=========KNN==========

=== Training Time by 'Scaler' ===
param_Scaler
StandardScaler()    0.012959
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__n_components' ===
param_PCA__n_components
4    0.010859
2    0.011056
3    0.013773
5    0.015075
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__random_state' ===
param_PCA__random_state
42    0.012691
Name: mean_fit_time, dtype: float64

=== Training Time by 'KNN__n_neighbors' ===
param_KNN__n_neighbors
3    0.009548
5    0.009905
9    0.014441
7    0.016870
Name: mean_fit_time, dtype: float64

=== Training Time by 'KNN__weights' ===
param_KNN__weights
distance    0.011920
uniform     0.013462
Name: mean_fit_time, dtype: float64

=== Training Time by 'KNN__algorithm' ===
param_KNN__algorithm
kd_tree      0.009392
ball_tree    0.010237
brute        0.010730
auto         0.020405
Name: mean_fit_time, dtype: float64
=========Naive Bayes==========

====== Training Time by Group 1 ======

=== Training Time by 'Scaler' ===
param_Scaler
StandardScaler()    0.008058
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__n_components' ===
param_PCA__n_components
2    0.005977
5    0.006800
4    0.008677
3    0.008859
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__random_state' ===
param_PCA__random_state
42    0.007578
Name: mean_fit_time, dtype: float64

=== Training Time by 'clf' ===
param_clf
GaussianNB()    0.007578
Name: mean_fit_time, dtype: float64

=== Training Time by 'clf__var_smoothing' ===
param_clf__var_smoothing
1.000000e-09    0.007111
1.000000e-07    0.007402
1.000000e-08    0.008222
Name: mean_fit_time, dtype: float64

====== Training Time by Group 2 ======

=== Training Time by 'Scaler' ===
param_Scaler
StandardScaler()    0.009551
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__n_components' ===
param_PCA__n_components
5    0.007317
4    0.007401
2    0.009341
3    0.009866
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__random_state' ===
param_PCA__random_state
42    0.008481
Name: mean_fit_time, dtype: float64

=== Training Time by 'clf' ===
param_clf
BernoulliNB()    0.008481
Name: mean_fit_time, dtype: float64

=== Training Time by 'clf__alpha' ===
param_clf__alpha
0.5    0.008071
1.0    0.008891
Name: mean_fit_time, dtype: float64

=== Training Time by 'clf__binarize' ===
param_clf__binarize
0.0    0.008322
0.5    0.008640
Name: mean_fit_time, dtype: float64
=========Logistic Regression==========

=== Training Time by 'Scaler' ===
param_Scaler
StandardScaler()    0.004787
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__n_components' ===
param_PCA__n_components
2    0.004633
3    0.004845
4    0.005006
5    0.005215
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__random_state' ===
param_PCA__random_state
42    0.004925
Name: mean_fit_time, dtype: float64

=== Training Time by 'LogisticRegression__C' ===
param_LogisticRegression__C
0.01      0.004727
10.00     0.004782
100.00    0.004839
0.10      0.005043
1.00      0.005231
Name: mean_fit_time, dtype: float64

=== Training Time by 'LogisticRegression__penalty' ===
param_LogisticRegression__penalty
l2    0.004878
Name: mean_fit_time, dtype: float64

=== Training Time by 'LogisticRegression__solver' ===
param_LogisticRegression__solver
liblinear    0.003953
lbfgs        0.005896
Name: mean_fit_time, dtype: float64
=========Decision Tree==========

=== Training Time by 'Scaler' ===
param_Scaler
StandardScaler()    0.017469
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__n_components' ===
param_PCA__n_components
2    0.014665
3    0.015257
4    0.017284
5    0.017388
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__random_state' ===
param_PCA__random_state
42    0.016148
Name: mean_fit_time, dtype: float64

=== Training Time by 'DecisionTree__criterion' ===
param_DecisionTree__criterion
entropy    0.015955
gini       0.016342
Name: mean_fit_time, dtype: float64

=== Training Time by 'DecisionTree__max_depth' ===
param_DecisionTree__max_depth
15    0.013242
10    0.016003
5     0.017335
Name: mean_fit_time, dtype: float64

=== Training Time by 'DecisionTree__min_samples_split' ===
param_DecisionTree__min_samples_split
2     0.015987
10    0.016134
5     0.016324
Name: mean_fit_time, dtype: float64

=== Training Time by 'DecisionTree__min_samples_leaf' ===
param_DecisionTree__min_samples_leaf
2    0.015004
1    0.015848
4    0.017592
Name: mean_fit_time, dtype: float64
=========Linear SVM==========

=== Training Time by 'Scaler' ===
param_Scaler
StandardScaler()    0.011021
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__n_components' ===
param_PCA__n_components
2    0.008914
3    0.010188
5    0.010468
4    0.011788
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__random_state' ===
param_PCA__random_state
42    0.01034
Name: mean_fit_time, dtype: float64

=== Training Time by 'SVM__C' ===
param_SVM__C
1.000      0.009771
10.000     0.009981
0.001      0.010090
0.100      0.010398
50.000     0.010508
0.010      0.010739
100.000    0.010890
Name: mean_fit_time, dtype: float64

=== Training Time by 'SVM__penalty' ===
param_SVM__penalty
l1    0.010072
l2    0.010607
Name: mean_fit_time, dtype: float64
=========Kernel SVM==========

====== Training Time by Group 1 ======

=== Training Time by 'Scaler' ===
param_Scaler
StandardScaler()    0.013573
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__n_components' ===
param_PCA__n_components
4    0.025761
5    0.026543
3    0.030225
2    0.034988
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__random_state' ===
param_PCA__random_state
42    0.029379
Name: mean_fit_time, dtype: float64

=== Training Time by 'SVM__C' ===
param_SVM__C
100.00    0.009573
50.00     0.009957
10.00     0.010370
5.00      0.010946
0.05      0.013030
0.01      0.015324
0.10      0.019631
1.00      0.079760
Name: mean_fit_time, dtype: float64

=== Training Time by 'SVM__kernel' ===
param_SVM__kernel
sigmoid    0.011195
rbf        0.012176
poly       0.012911
linear     0.189618
Name: mean_fit_time, dtype: float64

=== Training Time by 'SVM__probability' ===
param_SVM__probability
False    0.029379
Name: mean_fit_time, dtype: float64

=== Training Time by 'SVM__gamma' ===
param_SVM__gamma
0.0      0.012911
scale    0.039780
Name: mean_fit_time, dtype: float64

====== Training Time by Group 2 ======

=== Training Time by 'Scaler' ===
param_Scaler
StandardScaler()    0.013573
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__n_components' ===
param_PCA__n_components
4    0.025761
5    0.026543
3    0.030225
2    0.034988
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__random_state' ===
param_PCA__random_state
42    0.029379
Name: mean_fit_time, dtype: float64

=== Training Time by 'SVM__C' ===
param_SVM__C
100.00    0.009573
50.00     0.009957
10.00     0.010370
5.00      0.010946
0.05      0.013030
0.01      0.015324
0.10      0.019631
1.00      0.079760
Name: mean_fit_time, dtype: float64

=== Training Time by 'SVM__kernel' ===
param_SVM__kernel
sigmoid    0.011195
rbf        0.012176
poly       0.012911
linear     0.189618
Name: mean_fit_time, dtype: float64

=== Training Time by 'SVM__probability' ===
param_SVM__probability
False    0.029379
Name: mean_fit_time, dtype: float64

=== Training Time by 'SVM__gamma' ===
param_SVM__gamma
0.0      0.012911
scale    0.039780
Name: mean_fit_time, dtype: float64

====== Training Time by Group 3 ======

=== Training Time by 'Scaler' ===
param_Scaler
StandardScaler()    0.013828
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__n_components' ===
param_PCA__n_components
3    0.011362
2    0.011795
5    0.013989
4    0.014499
Name: mean_fit_time, dtype: float64

=== Training Time by 'PCA__random_state' ===
param_PCA__random_state
42    0.012911
Name: mean_fit_time, dtype: float64

=== Training Time by 'SVM__C' ===
param_SVM__C
0.10    0.012138
0.01    0.013002
1.00    0.013595
Name: mean_fit_time, dtype: float64

=== Training Time by 'SVM__kernel' ===
param_SVM__kernel
poly    0.012911
Name: mean_fit_time, dtype: float64

=== Training Time by 'SVM__degree' ===
param_SVM__degree
3.0    0.012437
2.0    0.012645
4.0    0.012986
5.0    0.013577
Name: mean_fit_time, dtype: float64

=== Training Time by 'SVM__probability' ===
param_SVM__probability
False    0.012911
Name: mean_fit_time, dtype: float64

=== Training Time by 'SVM__gamma' ===
param_SVM__gamma
1.000000e-09    0.012911
Name: mean_fit_time, dtype: float64
In [ ]: